// UKPVSEthnicity

log using UKPVSEthnicity.log, replace

use "C:\Users\sbstjp\OneDrive - Cardiff University\UKPVS.dta" // Prosser, Magasin, Proulx and Haddock, UK Progressive Values Dataset, September 2024 // Accessed on March 20 2025

// Demographics
*Delete missing values and rename variables
replace gender=. if inrange(gender, 3, 5)
rename gender FemaleGender
replace Household_Income=. if Household_Income==999

*Create dummies
gen Graduate=. 
replace Graduate=0 if inrange(Education, 1, 15)
replace Graduate=1 if inrange(Education, 16, 18)

gen Black=.
replace Black=1 if Ethnicity2==3
replace Black=0 if inlist(Ethnicity2, 1, 2)
replace Black=0 if inlist(Ethnicity2, 4, 5)

gen Asian=.
replace Asian=1 if Ethnicity2==2
replace Asian=0 if Ethnicity2==1
replace Asian=0 if inrange(Ethnicity2, 3, 5)

gen Multiracial=.
replace Multiracial=1 if Ethnicity2==4
replace Multiracial=0 if Ethnicity2==5
replace Multiracial=0 if inrange(Ethnicity2, 1, 3)

gen White=. 
replace White=0 if inrange(Ethnicity2, 2, 5)
replace White=1 if Ethnicity2==1

// Create a weight
gen weight = 1

* Code age into categories - the other variables are already in such categories
recode ageNew (min/24=1 "0-24") (25/34=2 "25-34") (35/44=3 "35-44") (45/54=4 "45-54") (55/max=5 "55+"), generate(age_group)

* Generate totals for the weights - these are based on BESW29 as this dataset has political selfid, unlike census data
gen sextot=.
replace sextot = 0.50 if FemaleGender == 1 // Male
replace sextot = 0.50 if FemaleGender == 2 // Female

gen agetot=.
replace agetot = 0.13 if age_group == 1 //18-24
replace agetot = 0.14 if age_group == 2 //25-34
replace agetot = 0.19 if age_group == 3 //35-44
replace agetot = 0.19 if age_group == 4 //45-54
replace agetot = 0.35 if age_group == 5 //55+

gen ethtot=.
replace ethtot = 0.86 if Ethnicity2 == 1 // White
replace ethtot = 0.06 if Ethnicity2 == 2 // Asian
replace ethtot = 0.04 if Ethnicity2 == 3 // Black
replace ethtot = 0.04 if Ethnicity2 == 4 // Mixed

gen edcats=.
replace edcats=1 if inrange(Education, 1, 15) // Unidiplomaandbelow
replace edcats=2 if Education==16 // Undergraddegree
replace edcats=3 if inrange(Education, 17, 18) // Postgradandabove

gen edtot=.
replace edtot = 0.4740 if edcats == 1 // Unidiplomaandbelow
replace edtot = 0.2969 if edcats == 2 // Undergraddegree
replace edtot = 0.2291 if edcats == 3 // Postgradandabove

gen inccats=.
replace inccats=1 if inrange(Household_Income, 1, 6) //under 30k
replace inccats=2 if inrange(Household_Income, 7, 11) //30-60k
replace inccats=3 if inlist(Household_Income, 12, 13) //60-100k
replace inccats=4 if inlist(Household_Income, 14, 15) //over 100k

gen inctot=.
replace inctot = 0.3806 if inccats == 1 //under 30k
replace inctot = 0.3524 if inccats == 2 //30-60k
replace inctot = 0.1922 if inccats == 3 //60-100k
replace inctot = 0.0747 if inccats == 4 //over 100k

* Rake the weights using the Stata survwgt package
survwgt rake weight , by(FemaleGender age_group Ethnicity2 edcats inccats) totvars(sextot agetot ethtot edtot inctot) generate(rakedweight)

// Standardize variables
egen Age = std(ageNew)
egen Income = std(Household_Income)

rename PVS pvs
* Change to 1-2 scale, so it's the same as other dependent variables in the book
foreach var in pvs {
    gen s`var' = 1 + (`var' - 1) / (7 - 1)
}

rename spvs PVS

// Regressions
regress PVS Age FemaleGender Graduate Income White [pweight= rakedweight], robust
eststo
regress PVS Age FemaleGender Graduate Income Black [pweight= rakedweight], robust
eststo
regress PVS Age FemaleGender Graduate Income Asian [pweight= rakedweight], robust
eststo
regress PVS Age FemaleGender Graduate Income Multiracial [pweight= rakedweight], robust
eststo
esttab

// Correlations
pwcorr PVS White Black Asian Multiracial [aweight= rakedweight], sig

log close
